# A tibble: 6 × 12
site_id site_name lat long station_no designation basin region subbasin
<chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
1 AB Avery Brook 42.4 -72.7 01171000 little West… Mass West Br…
2 JB Jimmy Brook 42.4 -72.7 01171040 little West… Mass West Br…
3 MB Mitchell Bro… 42.4 -72.7 01171080 little West… Mass West Br…
4 OL Obear Brook … 42.4 -72.7 01171070 little West… Mass West Br…
5 SD Sanderson Br… 42.4 -72.7 01171010 little West… Mass West Br…
6 WL West Brook L… 42.4 -72.7 01171090 little West… Mass West Br…
# ℹ 3 more variables: source <chr>, area_sqmi <dbl>, elev_ft <dbl>
# Greater Yellowstone/Al-Chokhachygyafiles <-list.files("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Al-Chokhachy/Al-Chokhachy data files")gyalist <-list()for (i in1:length(gyafiles)) { gyalist[[i]] <-read_csv(paste("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Al-Chokhachy/Al-Chokhachy data files/", gyafiles[i], sep ="")) %>%mutate(date =mdy(date), datetime =ymd_hms(paste(date, time, sep =" "), tz ="MST"), discharge =as.numeric(discharge)*35.314666212661) %>%rename(DischargeReliability = DischargeReliability_JB, TempReliability = TempReliability_JB)}dat_gya <-bind_rows(gyalist) %>%select(datetime, depth, discharge, temperature, location, DischargeReliability, TempReliability) %>%rename(height = depth, flow = discharge, tempc = temperature, site_name = location) %>%filter(site_name !="EF Henrys") %>%# drop weird duplicate site/year?mutate(site_name = dplyr::recode(site_name,"EF Above Confluence"="EF Duck Creek ab HF","EF Below Confluence"="EF Duck Creek be HF","NF Spread Creek"="NF Spread Creek Lower","Upper NF Spread Creek"="NF Spread Creek Upper","SF Spread Creek"="SF Spread Creek Lower","Upper SF Spread Creek"="SF Spread Creek Upper","Shields River above Dugout Creek"="Shields River ab Dugout","Upper Leidy Creek"="Leidy Creek Upper", "Leidy Creek"="Leidy Creek Mouth","Spread Creek"="Spread Creek Dam","Shields River above Smith Creek"="Shields River Valley Ranch")) %>%left_join(siteinfo %>%filter(source =="ECOD")) %>%filter(tempc <=100) %>%mutate(DischargeReliability =as.factor(DischargeReliability), TempReliability =as.factor(TempReliability))# set tz to local and convert to UTCtz(dat_gya$datetime) <-"MST"dat_gya$datetime <-with_tz(dat_gya$datetime, "UTC")head(dat_gya)
# A tibble: 6 × 18
datetime height flow tempc site_name DischargeReliability
<dttm> <dbl> <dbl> <dbl> <chr> <fct>
1 2019-06-17 15:45:00 0.521 45.3 5.66 Brackett Creek 1
2 2019-06-17 16:45:00 0.551 50.4 6.27 Brackett Creek 1
3 2019-06-17 17:45:00 0.526 46.1 7.08 Brackett Creek 1
4 2019-06-17 18:45:00 0.533 47.3 7.88 Brackett Creek 1
5 2019-06-17 19:45:00 0.528 46.4 8.38 Brackett Creek 1
6 2019-06-17 20:45:00 0.53 46.8 9.77 Brackett Creek 1
# ℹ 12 more variables: TempReliability <fct>, site_id <chr>, lat <dbl>,
# long <dbl>, station_no <chr>, designation <chr>, basin <chr>, region <chr>,
# subbasin <chr>, source <chr>, area_sqmi <dbl>, elev_ft <dbl>
Bind EcoD hourly flow/temp data with siteinfo and write to file
Code
dat <-bind_rows(dat_wb, dat_shen, dat_flat, dat_gya)
2.2.2 Duplicates
Check for duplicates: number of duplicated unique date/times by site. For the Duck Ck and Spread Ck sites, this is driven by errors in how the datetimes are coded/specified (see ReviewData.qmd).
Code
dat %>%group_by(site_name, datetime) %>%filter(n()>1) %>%arrange(site_name, datetime) %>%group_by(site_name) %>%summarize(num_dups =length(unique(datetime))) %>%kable()
# A tibble: 6 × 17
station_no datetime flowcfs flowcfs_appcd tz tempc tempc_appcd
<chr> <dttm> <dbl> <chr> <chr> <dbl> <chr>
1 01169900 1990-10-01 04:15:00 15 A [91] UTC NA <NA>
2 01169900 1990-10-01 04:30:00 16 A [91] UTC NA <NA>
3 01169900 1990-10-01 04:45:00 16 A [91] UTC NA <NA>
4 01169900 1990-10-01 05:00:00 16 A [91] UTC NA <NA>
5 01169900 1990-10-01 05:15:00 16 A [91] UTC NA <NA>
6 01169900 1990-10-01 05:30:00 16 A [91] UTC NA <NA>
# ℹ 10 more variables: site_id <chr>, site_name <chr>, lat <dbl>, long <dbl>,
# designation <chr>, basin <chr>, region <chr>, subbasin <chr>,
# area_sqmi <dbl>, elev_ft <dbl>
2.3.2 Check approval
A, A[91], A[92], A[93]: approved and historical daily values ~match observed data
A e: approved estimated data, often during ice affected periods (smoothed data)
P: provisional data yet to be approved, typically these are just more recent observations
P e: provisional estimated data, often during ice affected periods
P Ice: ice affected observations
P dis: provisional data yet to be approved, site has been discontinued
NA: missing value, usually b/c temp is observed but flow is not
Flow approval codes
Code
unique(nwis_subdaily$flowcfs_appcd)
[1] "A [91]" "A [92]" "A [93]" "A" "A e" "P" "P e" "P Ice"
[9] NA "P Dis"